library(tidyverse)
movies <- read_csv("Data_Transformations/movies_clean.csv")
# View(movies)
summary(movies)
## budget homepage id keywords
## Min. : 0 Length:4775 Min. : 5 Length:4775
## 1st Qu.: 950000 Class :character 1st Qu.: 8998 Class :character
## Median : 15000000 Mode :character Median : 14536 Mode :character
## Mean : 29214581 Mean : 55988
## 3rd Qu.: 40000000 3rd Qu.: 57206
## Max. :380000000 Max. :459488
##
## original_language original_title overview popularity
## Length:4775 Length:4775 Length:4775 Min. : 0.0004
## Class :character Class :character Class :character 1st Qu.: 4.8066
## Mode :character Mode :character Mode :character Median : 13.1191
## Mean : 21.6173
## 3rd Qu.: 28.4991
## Max. :875.5813
##
## production_companies production_countries release_date
## Length:4775 Length:4775 Min. :1916-09-04
## Class :character Class :character 1st Qu.:1999-06-29
## Mode :character Mode :character Median :2005-09-23
## Mean :2002-12-14
## 3rd Qu.:2011-02-07
## Max. :2017-02-03
##
## revenue runtime spoken_languages status
## Min. :0.000e+00 Min. : 0.0 Length:4775 Length:4775
## 1st Qu.:0.000e+00 1st Qu.: 94.0 Class :character Class :character
## Median :1.947e+07 Median :104.0 Mode :character Mode :character
## Mean :8.274e+07 Mean :107.2
## 3rd Qu.:9.357e+07 3rd Qu.:118.0
## Max. :2.788e+09 Max. :338.0
## NA's :2
## tagline title vote_average vote_count
## Length:4775 Length:4775 Min. : 0.000 Min. : 0.0
## Class :character Class :character 1st Qu.: 5.600 1st Qu.: 55.0
## Mode :character Mode :character Median : 6.200 Median : 238.0
## Mean : 6.114 Mean : 694.3
## 3rd Qu.: 6.800 3rd Qu.: 742.0
## Max. :10.000 Max. :13752.0
##
## release_year genre_Action genre_Adventure genre_Fantasy genre_Science
## Min. :1916 Mode :logical Mode :logical Mode :logical Mode :logical
## 1st Qu.:1999 FALSE:3621 FALSE:3985 FALSE:4351 FALSE:4240
## Median :2005 TRUE :1154 TRUE :790 TRUE :424 TRUE :535
## Mean :2002
## 3rd Qu.:2011
## Max. :2017
##
## genre_Crime genre_Drama genre_Thriller genre_Animation
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:4079 FALSE:2478 FALSE:3501 FALSE:4541
## TRUE :696 TRUE :2297 TRUE :1274 TRUE :234
##
##
##
##
## genre_Family genre_Western genre_Comedy genre_Romance
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:4262 FALSE:4693 FALSE:3053 FALSE:3881
## TRUE :513 TRUE :82 TRUE :1722 TRUE :894
##
##
##
##
## genre_Horror genre_Mystery genre_History genre_War
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:4256 FALSE:4427 FALSE:4578 FALSE:4631
## TRUE :519 TRUE :348 TRUE :197 TRUE :144
##
##
##
##
## genre_Music genre_Documentary genre_Foreign genre_TV
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:4590 FALSE:4665 FALSE:4741 FALSE:4767
## TRUE :185 TRUE :110 TRUE :34 TRUE :8
##
##
##
##
str(movies)
## spec_tbl_df [4,775 × 40] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ budget : num [1:4775] 2.37e+08 3.00e+08 2.45e+08 2.50e+08 2.60e+08 2.58e+08 2.60e+08 2.80e+08 2.50e+08 2.50e+08 ...
## $ homepage : chr [1:4775] "http://www.avatarmovie.com/" "http://disney.go.com/disneypictures/pirates/" "http://www.sonypictures.com/movies/spectre/" "http://www.thedarkknightrises.com/" ...
## $ id : num [1:4775] 19995 285 206647 49026 49529 ...
## $ keywords : chr [1:4775] "[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \"name\": \""| __truncated__ "[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"name\": \"drug abuse\"}, {\"id\": 911, \"name\": \"exotic "| __truncated__ "[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name\": \"based on novel\"}, {\"id\": 4289, \"name\": \"secr"| __truncated__ "[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853, \"name\": \"crime fighter\"}, {\"id\": 949, \"name\": \""| __truncated__ ...
## $ original_language : chr [1:4775] "en" "en" "en" "en" ...
## $ original_title : chr [1:4775] "Avatar" "Pirates of the Caribbean: At World's End" "Spectre" "The Dark Knight Rises" ...
## $ overview : chr [1:4775] "In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes tor"| __truncated__ "Captain Barbossa, long believed to be dead, has come back to life and is headed to the edge of the Earth with W"| __truncated__ "A cryptic message from Bond’s past sends him on a trail to uncover a sinister organization. While M battles pol"| __truncated__ "Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protec"| __truncated__ ...
## $ popularity : num [1:4775] 150.4 139.1 107.4 112.3 43.9 ...
## $ production_companies: chr [1:4775] "[{\"name\": \"Ingenious Film Partners\", \"id\": 289}, {\"name\": \"Twentieth Century Fox Film Corporation\", \"| __truncated__ "[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"name\": \"Jerry Bruckheimer Films\", \"id\": 130}, {\"name"| __truncated__ "[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"name\": \"Danjaq\", \"id\": 10761}, {\"name\": \"B24\", \"id\": 69434}]" "[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"name\": \"Warner Bros.\", \"id\": 6194}, {\"name\": \"DC E"| __truncated__ ...
## $ production_countries: chr [1:4775] "[{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}, {\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"}]" "[{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}]" "[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"}, {\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}]" "[{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}]" ...
## $ release_date : Date[1:4775], format: "2009-12-10" "2007-05-19" ...
## $ revenue : num [1:4775] 2.79e+09 9.61e+08 8.81e+08 1.08e+09 2.84e+08 ...
## $ runtime : num [1:4775] 162 169 148 165 132 139 100 141 153 151 ...
## $ spoken_languages : chr [1:4775] "[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]" "[{\"iso_639_1\": \"en\", \"name\": \"English\"}]" "[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"}, {\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso_"| __truncated__ "[{\"iso_639_1\": \"en\", \"name\": \"English\"}]" ...
## $ status : chr [1:4775] "Released" "Released" "Released" "Released" ...
## $ tagline : chr [1:4775] "Enter the World of Pandora." "At the end of the world, the adventure begins." "A Plan No One Escapes" "The Legend Ends" ...
## $ title : chr [1:4775] "Avatar" "Pirates of the Caribbean: At World's End" "Spectre" "The Dark Knight Rises" ...
## $ vote_average : num [1:4775] 7.2 6.9 6.3 7.6 6.1 5.9 7.4 7.3 7.4 5.7 ...
## $ vote_count : num [1:4775] 11800 4500 4466 9106 2124 ...
## $ release_year : num [1:4775] 2009 2007 2015 2012 2012 ...
## $ genre_Action : logi [1:4775] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ genre_Adventure : logi [1:4775] TRUE TRUE TRUE FALSE TRUE TRUE ...
## $ genre_Fantasy : logi [1:4775] TRUE TRUE FALSE FALSE FALSE TRUE ...
## $ genre_Science : logi [1:4775] TRUE FALSE FALSE FALSE TRUE FALSE ...
## $ genre_Crime : logi [1:4775] FALSE FALSE TRUE TRUE FALSE FALSE ...
## $ genre_Drama : logi [1:4775] FALSE FALSE FALSE TRUE FALSE FALSE ...
## $ genre_Thriller : logi [1:4775] FALSE FALSE FALSE TRUE FALSE FALSE ...
## $ genre_Animation : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Family : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Western : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Comedy : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Romance : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Horror : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Mystery : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_History : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_War : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Music : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Documentary : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_Foreign : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ genre_TV : logi [1:4775] FALSE FALSE FALSE FALSE FALSE FALSE ...
## - attr(*, "spec")=
## .. cols(
## .. budget = col_double(),
## .. homepage = col_character(),
## .. id = col_double(),
## .. keywords = col_character(),
## .. original_language = col_character(),
## .. original_title = col_character(),
## .. overview = col_character(),
## .. popularity = col_double(),
## .. production_companies = col_character(),
## .. production_countries = col_character(),
## .. release_date = col_date(format = ""),
## .. revenue = col_double(),
## .. runtime = col_double(),
## .. spoken_languages = col_character(),
## .. status = col_character(),
## .. tagline = col_character(),
## .. title = col_character(),
## .. vote_average = col_double(),
## .. vote_count = col_double(),
## .. release_year = col_double(),
## .. genre_Action = col_logical(),
## .. genre_Adventure = col_logical(),
## .. genre_Fantasy = col_logical(),
## .. genre_Science = col_logical(),
## .. genre_Crime = col_logical(),
## .. genre_Drama = col_logical(),
## .. genre_Thriller = col_logical(),
## .. genre_Animation = col_logical(),
## .. genre_Family = col_logical(),
## .. genre_Western = col_logical(),
## .. genre_Comedy = col_logical(),
## .. genre_Romance = col_logical(),
## .. genre_Horror = col_logical(),
## .. genre_Mystery = col_logical(),
## .. genre_History = col_logical(),
## .. genre_War = col_logical(),
## .. genre_Music = col_logical(),
## .. genre_Documentary = col_logical(),
## .. genre_Foreign = col_logical(),
## .. genre_TV = col_logical()
## .. )
## - attr(*, "problems")=<externalptr>
glimpse(movies)
## Rows: 4,775
## Columns: 40
## $ budget <dbl> 2.37e+08, 3.00e+08, 2.45e+08, 2.50e+08, 2.60e+08,…
## $ homepage <chr> "http://www.avatarmovie.com/", "http://disney.go.…
## $ id <dbl> 19995, 285, 206647, 49026, 49529, 559, 38757, 998…
## $ keywords <chr> "[{\"id\": 1463, \"name\": \"culture clash\"}, {\…
## $ original_language <chr> "en", "en", "en", "en", "en", "en", "en", "en", "…
## $ original_title <chr> "Avatar", "Pirates of the Caribbean: At World's E…
## $ overview <chr> "In the 22nd century, a paraplegic Marine is disp…
## $ popularity <dbl> 150.43758, 139.08262, 107.37679, 112.31295, 43.92…
## $ production_companies <chr> "[{\"name\": \"Ingenious Film Partners\", \"id\":…
## $ production_countries <chr> "[{\"iso_3166_1\": \"US\", \"name\": \"United Sta…
## $ release_date <date> 2009-12-10, 2007-05-19, 2015-10-26, 2012-07-16, …
## $ revenue <dbl> 2787965087, 961000000, 880674609, 1084939099, 284…
## $ runtime <dbl> 162, 169, 148, 165, 132, 139, 100, 141, 153, 151,…
## $ spoken_languages <chr> "[{\"iso_639_1\": \"en\", \"name\": \"English\"},…
## $ status <chr> "Released", "Released", "Released", "Released", "…
## $ tagline <chr> "Enter the World of Pandora.", "At the end of the…
## $ title <chr> "Avatar", "Pirates of the Caribbean: At World's E…
## $ vote_average <dbl> 7.2, 6.9, 6.3, 7.6, 6.1, 5.9, 7.4, 7.3, 7.4, 5.7,…
## $ vote_count <dbl> 11800, 4500, 4466, 9106, 2124, 3576, 3330, 6767, …
## $ release_year <dbl> 2009, 2007, 2015, 2012, 2012, 2007, 2010, 2015, 2…
## $ genre_Action <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, …
## $ genre_Adventure <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE,…
## $ genre_Fantasy <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FAL…
## $ genre_Science <lgl> TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TR…
## $ genre_Crime <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FA…
## $ genre_Drama <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, F…
## $ genre_Thriller <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, F…
## $ genre_Animation <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, F…
## $ genre_Family <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, F…
## $ genre_Western <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Comedy <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Romance <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Horror <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Mystery <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_History <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_War <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Music <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Documentary <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_Foreign <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ genre_TV <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
select(movies, id, original_title, budget, popularity)
select(movies, -keywords)
select(movies, id, original_title, genre_Action:genre_TV)
select(movies, id, original_title, starts_with("genre"))
movies_sel datasetmovies_sel <-
select(movies,
id, contains("title"), contains("original"), budget, contains("vote"),
starts_with("release"), where(is.double), starts_with("genre"))
filterglimpse(movies_sel)
## Rows: 4,775
## Columns: 32
## $ id <dbl> 19995, 285, 206647, 49026, 49529, 559, 38757, 99861,…
## $ original_title <chr> "Avatar", "Pirates of the Caribbean: At World's End"…
## $ title <chr> "Avatar", "Pirates of the Caribbean: At World's End"…
## $ original_language <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en"…
## $ budget <dbl> 2.37e+08, 3.00e+08, 2.45e+08, 2.50e+08, 2.60e+08, 2.…
## $ vote_average <dbl> 7.2, 6.9, 6.3, 7.6, 6.1, 5.9, 7.4, 7.3, 7.4, 5.7, 5.…
## $ vote_count <dbl> 11800, 4500, 4466, 9106, 2124, 3576, 3330, 6767, 529…
## $ release_date <date> 2009-12-10, 2007-05-19, 2015-10-26, 2012-07-16, 201…
## $ release_year <dbl> 2009, 2007, 2015, 2012, 2012, 2007, 2010, 2015, 2009…
## $ popularity <dbl> 150.43758, 139.08262, 107.37679, 112.31295, 43.92699…
## $ revenue <dbl> 2787965087, 961000000, 880674609, 1084939099, 284139…
## $ runtime <dbl> 162, 169, 148, 165, 132, 139, 100, 141, 153, 151, 15…
## $ genre_Action <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, FAL…
## $ genre_Adventure <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TR…
## $ genre_Fantasy <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,…
## $ genre_Science <lgl> TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE,…
## $ genre_Crime <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE…
## $ genre_Drama <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALS…
## $ genre_Thriller <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALS…
## $ genre_Animation <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALS…
## $ genre_Family <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALS…
## $ genre_Western <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Comedy <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Romance <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Horror <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Mystery <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_History <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_War <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Music <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Documentary <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_Foreign <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ genre_TV <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
Which movies are action movies?
filter(movies_sel,
genre_Action)
What movies have a vote average over 7.5?
filter(movies_sel,
vote_average > 7.5)
What action movies have an average voter rating over 7.5?
filter(movies_sel,
genre_Action,
vote_average > 7.5)
How many movies have an original language in English, French, Spanish, or Italian? “en”, “fr”, “es”, “it”
filter(movies_sel,
original_language == "en" | original_language == "fr" | original_language == "es" | original_language == "it")
Given the numbers 1 through 100, which of those numbers are in 1 through 10?
1:100 %in% 1:10
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE
filter(movies_sel,
original_language %in% c("en", "fr", "es", "it"))
romance_languages <- c("en", "fr", "es", "it", "po")
filter(movies_sel,
original_language %in% romance_languages)
arrangeWhat movie is most expensive to produce? Which movie has the highest budget?
arrange(movies_sel,
desc(budget))
arrange(movies_sel,
release_year, desc(budget))
mutateWhat movie has the highest profit?
mutate(movies_sel,
profit = revenue - budget)
How does the runtime for each movie compare to the average runtime for all movies?
mutate(movies_sel,
runtime_diff = runtime - mean(runtime, na.rm = TRUE),
budget_diff = budget - mean(budget, na.rm = TRUE))
summarizeWhat is the median and mean budget for all movies?
summarize(movies_sel,
budget_median = median(budget, na.rm = TRUE),
budget_mean = mean(budget, na.rm = TRUE))
group_by / ungroupWhat was the average budget by release year?
movies_group <-
group_by(movies_sel,
release_year)
summarize(movies_group,
budget_median = median(budget, na.rm = TRUE),
budget_mean = mean(budget, na.rm = TRUE))
What movie released in 2001 had the highest budget?
movies_selected <- select(movies, id, original_title, budget, release_year)
movies_filtered <- filter(movies_selected, release_year == 2001)
arrange(movies_filtered, desc(budget))
%>%movies_sel %>%
select(id, original_title, budget, release_year) %>%
filter(release_year == 2001) %>%
arrange(desc(budget))
movies_sel %>% select(id)
What movie had the highest vote average overall?
movies_sel %>%
arrange(desc(vote_average))
movies_sel %>%
filter(vote_average == max(vote_average))
What movie had the highest vote average for movies released after 2010?
movies_sel %>%
filter(release_year > 2010) %>%
arrange(desc(vote_average))
What movie had the highest vote average for movies with a vote counter greater than the median released after 2010?
movies_sel %>%
filter(release_year > 2010,
vote_count > median(vote_count, na.rm = TRUE)) %>%
arrange(desc(vote_average))
What movie had the highest vote average for movies that made at least double of their budget?
movies %>%
filter(budget > 0) %>%
mutate(budget_double = budget * 2) %>%
filter(revenue >= budget_double) %>%
arrange(desc(vote_average))
movies %>%
filter(budget > 0) %>%
mutate(revenue_percent = (revenue - budget) / budget) %>%
filter(revenue_percent >= 2) %>%
arrange(desc(vote_average))
How many movies are represented from each language?
movies_sel %>%
group_by(original_language) %>%
summarize(count = n()) %>%
ungroup() %>%
arrange(desc(count))
movies_sel %>%
count(original_language, sort = T, name = "count")
How much total budget was used across each original language?
movies_sel %>%
group_by(original_language) %>%
summarize(budget_total = sum(budget)) %>%
ungroup() %>%
arrange(desc(budget_total))
How did vote averages change for English language films over time?
movies_english_over_time <-
movies_sel %>%
filter(original_language == "en") %>%
group_by(release_year) %>%
summarize(vote_average = mean(vote_average)) %>%
ungroup()
movies_english_over_time %>%
ggplot(aes(x = release_year,
y = vote_average)) +
geom_point()
How many movie titles match their original title? How many did not?
movies_sel %>%
mutate(title_match_flag = title == original_title) %>%
count(title_match_flag) %>%
ggplot(aes(x = n,
y = title_match_flag)) +
geom_col()
movies_sel %>%
mutate(title_match_flag = title == original_title) %>%
count(release_year, title_match_flag)
What was the highest budget movie by release year?
movies_sel %>%
group_by(release_year) %>%
filter(budget == max(budget)) %>%
ungroup() %>%
select(release_year, budget, title) %>%
arrange(release_year)
library(tidyverse)
movies_sel_sample <-
structure(list(id = c(19995, 285, 206647, 49026, 49529, 559),
title = c("Avatar", "Pirates of the Caribbean: At World's End",
"Spectre", "The Dark Knight Rises", "John Carter", "Spider-Man 3"
), original_language = c("en", "en", "en", "en", "en", "en"
), vote_average = c(7.2, 6.9, 6.3, 7.6, 6.1, 5.9)), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
movies_sel_sample %>%
group_by(original_language) %>%
summarize(vote_average = mean(vote_average, na.rm = T)) %>%
ungroup() %>%
ggplot(aes(x = vote_average,
y = original_language)) %>%
geom_col()